{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 129,
   "id": "a28539a3",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "id": "808ac0f9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>member_id</th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>funded_amnt</th>\n",
       "      <th>funded_amnt_inv</th>\n",
       "      <th>term</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>installment</th>\n",
       "      <th>grade</th>\n",
       "      <th>sub_grade</th>\n",
       "      <th>...</th>\n",
       "      <th>hardship_payoff_balance_amount</th>\n",
       "      <th>hardship_last_payment_amount</th>\n",
       "      <th>disbursement_method</th>\n",
       "      <th>debt_settlement_flag</th>\n",
       "      <th>debt_settlement_flag_date</th>\n",
       "      <th>settlement_status</th>\n",
       "      <th>settlement_date</th>\n",
       "      <th>settlement_amount</th>\n",
       "      <th>settlement_percentage</th>\n",
       "      <th>settlement_term</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>18500.0</td>\n",
       "      <td>18500.0</td>\n",
       "      <td>18500.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>13.58%</td>\n",
       "      <td>628.52</td>\n",
       "      <td>C</td>\n",
       "      <td>C2</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Cash</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7000.0</td>\n",
       "      <td>7000.0</td>\n",
       "      <td>7000.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>7.34%</td>\n",
       "      <td>217.23</td>\n",
       "      <td>A</td>\n",
       "      <td>A4</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Cash</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>16000.0</td>\n",
       "      <td>16000.0</td>\n",
       "      <td>16000.0</td>\n",
       "      <td>60 months</td>\n",
       "      <td>11.98%</td>\n",
       "      <td>355.75</td>\n",
       "      <td>B</td>\n",
       "      <td>B5</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Cash</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>28000.0</td>\n",
       "      <td>28000.0</td>\n",
       "      <td>28000.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>10.90%</td>\n",
       "      <td>915.36</td>\n",
       "      <td>B</td>\n",
       "      <td>B4</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Cash</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8300.0</td>\n",
       "      <td>8300.0</td>\n",
       "      <td>8300.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>7.34%</td>\n",
       "      <td>257.58</td>\n",
       "      <td>A</td>\n",
       "      <td>A4</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Cash</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107861</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4375.0</td>\n",
       "      <td>4375.0</td>\n",
       "      <td>4375.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>14.08%</td>\n",
       "      <td>149.70</td>\n",
       "      <td>C</td>\n",
       "      <td>C3</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Cash</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107862</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12000.0</td>\n",
       "      <td>12000.0</td>\n",
       "      <td>11975.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>10.42%</td>\n",
       "      <td>389.58</td>\n",
       "      <td>B</td>\n",
       "      <td>B3</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Cash</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107863</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14000.0</td>\n",
       "      <td>14000.0</td>\n",
       "      <td>13975.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>13.59%</td>\n",
       "      <td>475.71</td>\n",
       "      <td>C</td>\n",
       "      <td>C2</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Cash</td>\n",
       "      <td>N</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107864</th>\n",
       "      <td>Total amount funded in policy code 1: 1741781700</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107865</th>\n",
       "      <td>Total amount funded in policy code 2: 539397275</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>215732 rows × 145 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                      id  member_id  \\\n",
       "0                                                    NaN        NaN   \n",
       "1                                                    NaN        NaN   \n",
       "2                                                    NaN        NaN   \n",
       "3                                                    NaN        NaN   \n",
       "4                                                    NaN        NaN   \n",
       "...                                                  ...        ...   \n",
       "107861                                               NaN        NaN   \n",
       "107862                                               NaN        NaN   \n",
       "107863                                               NaN        NaN   \n",
       "107864  Total amount funded in policy code 1: 1741781700        NaN   \n",
       "107865   Total amount funded in policy code 2: 539397275        NaN   \n",
       "\n",
       "        loan_amnt  funded_amnt  funded_amnt_inv        term int_rate  \\\n",
       "0         18500.0      18500.0          18500.0   36 months   13.58%   \n",
       "1          7000.0       7000.0           7000.0   36 months    7.34%   \n",
       "2         16000.0      16000.0          16000.0   60 months   11.98%   \n",
       "3         28000.0      28000.0          28000.0   36 months   10.90%   \n",
       "4          8300.0       8300.0           8300.0   36 months    7.34%   \n",
       "...           ...          ...              ...         ...      ...   \n",
       "107861     4375.0       4375.0           4375.0   36 months   14.08%   \n",
       "107862    12000.0      12000.0          11975.0   36 months   10.42%   \n",
       "107863    14000.0      14000.0          13975.0   36 months   13.59%   \n",
       "107864        NaN          NaN              NaN         NaN      NaN   \n",
       "107865        NaN          NaN              NaN         NaN      NaN   \n",
       "\n",
       "        installment grade sub_grade  ... hardship_payoff_balance_amount  \\\n",
       "0            628.52     C        C2  ...                            NaN   \n",
       "1            217.23     A        A4  ...                            NaN   \n",
       "2            355.75     B        B5  ...                            NaN   \n",
       "3            915.36     B        B4  ...                            NaN   \n",
       "4            257.58     A        A4  ...                            NaN   \n",
       "...             ...   ...       ...  ...                            ...   \n",
       "107861       149.70     C        C3  ...                            NaN   \n",
       "107862       389.58     B        B3  ...                            NaN   \n",
       "107863       475.71     C        C2  ...                            NaN   \n",
       "107864          NaN   NaN       NaN  ...                            NaN   \n",
       "107865          NaN   NaN       NaN  ...                            NaN   \n",
       "\n",
       "       hardship_last_payment_amount disbursement_method  debt_settlement_flag  \\\n",
       "0                               NaN                Cash                     N   \n",
       "1                               NaN                Cash                     N   \n",
       "2                               NaN                Cash                     N   \n",
       "3                               NaN                Cash                     N   \n",
       "4                               NaN                Cash                     N   \n",
       "...                             ...                 ...                   ...   \n",
       "107861                          NaN                Cash                     N   \n",
       "107862                          NaN                Cash                     N   \n",
       "107863                          NaN                Cash                     N   \n",
       "107864                          NaN                 NaN                   NaN   \n",
       "107865                          NaN                 NaN                   NaN   \n",
       "\n",
       "       debt_settlement_flag_date settlement_status settlement_date  \\\n",
       "0                            NaN               NaN             NaN   \n",
       "1                            NaN               NaN             NaN   \n",
       "2                            NaN               NaN             NaN   \n",
       "3                            NaN               NaN             NaN   \n",
       "4                            NaN               NaN             NaN   \n",
       "...                          ...               ...             ...   \n",
       "107861                       NaN               NaN             NaN   \n",
       "107862                       NaN               NaN             NaN   \n",
       "107863                       NaN               NaN             NaN   \n",
       "107864                       NaN               NaN             NaN   \n",
       "107865                       NaN               NaN             NaN   \n",
       "\n",
       "       settlement_amount  settlement_percentage  settlement_term  \n",
       "0                    NaN                    NaN              NaN  \n",
       "1                    NaN                    NaN              NaN  \n",
       "2                    NaN                    NaN              NaN  \n",
       "3                    NaN                    NaN              NaN  \n",
       "4                    NaN                    NaN              NaN  \n",
       "...                  ...                    ...              ...  \n",
       "107861               NaN                    NaN              NaN  \n",
       "107862               NaN                    NaN              NaN  \n",
       "107863               NaN                    NaN              NaN  \n",
       "107864               NaN                    NaN              NaN  \n",
       "107865               NaN                    NaN              NaN  \n",
       "\n",
       "[215732 rows x 145 columns]"
      ]
     },
     "execution_count": 130,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1 = pd.read_csv(\"./LoanStats/LoanStats_2018Q1.csv\",skiprows=1,low_memory=False)\n",
    "df2 = pd.read_csv(\"./LoanStats/LoanStats_2018Q1.csv\",skiprows=1,low_memory=False)\n",
    "df = pd.concat([df1,df2],axis=0)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "id": "c1de5c00",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 215732 entries, 0 to 107865\n",
      "Columns: 145 entries, id to settlement_term\n",
      "dtypes: float64(114), object(31)\n",
      "memory usage: 240.3+ MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "id": "b31fddf3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'ANY', 'MORTGAGE', 'OWN', 'RENT', nan}"
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set(df.home_ownership)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "id": "c0b814ae",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>term</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>grade</th>\n",
       "      <th>home_ownership</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>loan_status</th>\n",
       "      <th>disbursement_method</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>18500.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>13.58%</td>\n",
       "      <td>C</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>6 years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7000.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>7.34%</td>\n",
       "      <td>A</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>16000.0</td>\n",
       "      <td>60 months</td>\n",
       "      <td>11.98%</td>\n",
       "      <td>B</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>5 years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>28000.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>10.90%</td>\n",
       "      <td>B</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>6 years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8300.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>7.34%</td>\n",
       "      <td>A</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>4 years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   loan_amnt        term int_rate grade home_ownership emp_length loan_status  \\\n",
       "0    18500.0   36 months   13.58%     C       MORTGAGE    6 years     Current   \n",
       "1     7000.0   36 months    7.34%     A       MORTGAGE  10+ years     Current   \n",
       "2    16000.0   60 months   11.98%     B       MORTGAGE    5 years     Current   \n",
       "3    28000.0   36 months   10.90%     B       MORTGAGE    6 years     Current   \n",
       "4     8300.0   36 months    7.34%     A       MORTGAGE    4 years     Current   \n",
       "\n",
       "  disbursement_method  \n",
       "0                Cash  \n",
       "1                Cash  \n",
       "2                Cash  \n",
       "3                Cash  \n",
       "4                Cash  "
      ]
     },
     "execution_count": 133,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = df[['loan_amnt','term','int_rate','grade','home_ownership','emp_length','loan_status','disbursement_method']]\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "id": "74ac70bc",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.dropna(axis=0,how='any')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "id": "f04d63da",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Charged Off',\n",
       " 'Current',\n",
       " 'Default',\n",
       " 'Fully Paid',\n",
       " 'In Grace Period',\n",
       " 'Late (16-30 days)',\n",
       " 'Late (31-120 days)'}"
      ]
     },
     "execution_count": 135,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set(df.loan_status)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "id": "12b02136",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>term</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>grade</th>\n",
       "      <th>home_ownership</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>loan_status</th>\n",
       "      <th>disbursement_method</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2334</th>\n",
       "      <td>12000.0</td>\n",
       "      <td>60 months</td>\n",
       "      <td>17.47%</td>\n",
       "      <td>D</td>\n",
       "      <td>OWN</td>\n",
       "      <td>3 years</td>\n",
       "      <td>Charged Off</td>\n",
       "      <td>Cash</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7129</th>\n",
       "      <td>35000.0</td>\n",
       "      <td>60 months</td>\n",
       "      <td>18.45%</td>\n",
       "      <td>D</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>3 years</td>\n",
       "      <td>Charged Off</td>\n",
       "      <td>Cash</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11729</th>\n",
       "      <td>1500.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>20.39%</td>\n",
       "      <td>D</td>\n",
       "      <td>RENT</td>\n",
       "      <td>2 years</td>\n",
       "      <td>Charged Off</td>\n",
       "      <td>Cash</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11971</th>\n",
       "      <td>8000.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>9.43%</td>\n",
       "      <td>B</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>Charged Off</td>\n",
       "      <td>Cash</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15352</th>\n",
       "      <td>10000.0</td>\n",
       "      <td>60 months</td>\n",
       "      <td>10.90%</td>\n",
       "      <td>B</td>\n",
       "      <td>RENT</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>Charged Off</td>\n",
       "      <td>DirectPay</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>105787</th>\n",
       "      <td>25175.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>21.45%</td>\n",
       "      <td>D</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>5 years</td>\n",
       "      <td>Charged Off</td>\n",
       "      <td>Cash</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>106276</th>\n",
       "      <td>10000.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>10.42%</td>\n",
       "      <td>B</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>8 years</td>\n",
       "      <td>Charged Off</td>\n",
       "      <td>Cash</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107239</th>\n",
       "      <td>10000.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>11.99%</td>\n",
       "      <td>B</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>Charged Off</td>\n",
       "      <td>Cash</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107558</th>\n",
       "      <td>32200.0</td>\n",
       "      <td>60 months</td>\n",
       "      <td>16.02%</td>\n",
       "      <td>C</td>\n",
       "      <td>RENT</td>\n",
       "      <td>3 years</td>\n",
       "      <td>Charged Off</td>\n",
       "      <td>Cash</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107810</th>\n",
       "      <td>35175.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>17.09%</td>\n",
       "      <td>D</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>Charged Off</td>\n",
       "      <td>Cash</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>174 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        loan_amnt        term int_rate grade home_ownership emp_length  \\\n",
       "2334      12000.0   60 months   17.47%     D            OWN    3 years   \n",
       "7129      35000.0   60 months   18.45%     D       MORTGAGE    3 years   \n",
       "11729      1500.0   36 months   20.39%     D           RENT    2 years   \n",
       "11971      8000.0   36 months    9.43%     B       MORTGAGE  10+ years   \n",
       "15352     10000.0   60 months   10.90%     B           RENT  10+ years   \n",
       "...           ...         ...      ...   ...            ...        ...   \n",
       "105787    25175.0   36 months   21.45%     D       MORTGAGE    5 years   \n",
       "106276    10000.0   36 months   10.42%     B       MORTGAGE    8 years   \n",
       "107239    10000.0   36 months   11.99%     B       MORTGAGE  10+ years   \n",
       "107558    32200.0   60 months   16.02%     C           RENT    3 years   \n",
       "107810    35175.0   36 months   17.09%     D       MORTGAGE  10+ years   \n",
       "\n",
       "        loan_status disbursement_method  \n",
       "2334    Charged Off                Cash  \n",
       "7129    Charged Off                Cash  \n",
       "11729   Charged Off                Cash  \n",
       "11971   Charged Off                Cash  \n",
       "15352   Charged Off           DirectPay  \n",
       "...             ...                 ...  \n",
       "105787  Charged Off                Cash  \n",
       "106276  Charged Off                Cash  \n",
       "107239  Charged Off                Cash  \n",
       "107558  Charged Off                Cash  \n",
       "107810  Charged Off                Cash  \n",
       "\n",
       "[174 rows x 8 columns]"
      ]
     },
     "execution_count": 136,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df['loan_status']=='Charged Off']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "id": "a94d270c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'A', 'B', 'C', 'D', 'E', 'F', 'G'}"
      ]
     },
     "execution_count": 137,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set(df.grade)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "id": "1c99e107",
   "metadata": {},
   "outputs": [],
   "source": [
    "def getLabel(x):\n",
    "    if(x=='Charged Off' or x=='Default' or x=='Late (16-30 days)' or x=='Late (31-120 days)'):\n",
    "        return 1\n",
    "    else:\n",
    "        return 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "id": "a940f983",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0         0\n",
       "1         0\n",
       "2         0\n",
       "3         0\n",
       "4         0\n",
       "         ..\n",
       "107859    0\n",
       "107860    0\n",
       "107861    0\n",
       "107862    0\n",
       "107863    0\n",
       "Name: label, Length: 196872, dtype: int64"
      ]
     },
     "execution_count": 139,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['label'] = df.loan_status.apply(lambda x:getLabel(x))\n",
    "df.label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "id": "d445bf52",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{0, 1}"
      ]
     },
     "execution_count": 140,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set(df.label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "id": "944992f4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>term</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>grade</th>\n",
       "      <th>home_ownership</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>loan_status</th>\n",
       "      <th>disbursement_method</th>\n",
       "      <th>label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>214</th>\n",
       "      <td>6000.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>20.39%</td>\n",
       "      <td>D</td>\n",
       "      <td>RENT</td>\n",
       "      <td>2 years</td>\n",
       "      <td>Late (31-120 days)</td>\n",
       "      <td>Cash</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>276</th>\n",
       "      <td>12000.0</td>\n",
       "      <td>60 months</td>\n",
       "      <td>11.98%</td>\n",
       "      <td>B</td>\n",
       "      <td>OWN</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>Late (31-120 days)</td>\n",
       "      <td>Cash</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>499</th>\n",
       "      <td>6000.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>9.43%</td>\n",
       "      <td>B</td>\n",
       "      <td>RENT</td>\n",
       "      <td>6 years</td>\n",
       "      <td>Late (31-120 days)</td>\n",
       "      <td>Cash</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>547</th>\n",
       "      <td>5000.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>18.45%</td>\n",
       "      <td>D</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>1 year</td>\n",
       "      <td>Late (31-120 days)</td>\n",
       "      <td>Cash</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>655</th>\n",
       "      <td>1500.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>20.39%</td>\n",
       "      <td>D</td>\n",
       "      <td>RENT</td>\n",
       "      <td>6 years</td>\n",
       "      <td>Late (31-120 days)</td>\n",
       "      <td>Cash</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107558</th>\n",
       "      <td>32200.0</td>\n",
       "      <td>60 months</td>\n",
       "      <td>16.02%</td>\n",
       "      <td>C</td>\n",
       "      <td>RENT</td>\n",
       "      <td>3 years</td>\n",
       "      <td>Charged Off</td>\n",
       "      <td>Cash</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107571</th>\n",
       "      <td>10000.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>9.44%</td>\n",
       "      <td>B</td>\n",
       "      <td>RENT</td>\n",
       "      <td>&lt; 1 year</td>\n",
       "      <td>Late (31-120 days)</td>\n",
       "      <td>Cash</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107755</th>\n",
       "      <td>16000.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>20.00%</td>\n",
       "      <td>D</td>\n",
       "      <td>RENT</td>\n",
       "      <td>2 years</td>\n",
       "      <td>Late (31-120 days)</td>\n",
       "      <td>Cash</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107810</th>\n",
       "      <td>35175.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>17.09%</td>\n",
       "      <td>D</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>Charged Off</td>\n",
       "      <td>Cash</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107819</th>\n",
       "      <td>10000.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>16.02%</td>\n",
       "      <td>C</td>\n",
       "      <td>RENT</td>\n",
       "      <td>5 years</td>\n",
       "      <td>Late (16-30 days)</td>\n",
       "      <td>Cash</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2392 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        loan_amnt        term int_rate grade home_ownership emp_length  \\\n",
       "214        6000.0   36 months   20.39%     D           RENT    2 years   \n",
       "276       12000.0   60 months   11.98%     B            OWN  10+ years   \n",
       "499        6000.0   36 months    9.43%     B           RENT    6 years   \n",
       "547        5000.0   36 months   18.45%     D       MORTGAGE     1 year   \n",
       "655        1500.0   36 months   20.39%     D           RENT    6 years   \n",
       "...           ...         ...      ...   ...            ...        ...   \n",
       "107558    32200.0   60 months   16.02%     C           RENT    3 years   \n",
       "107571    10000.0   36 months    9.44%     B           RENT   < 1 year   \n",
       "107755    16000.0   36 months   20.00%     D           RENT    2 years   \n",
       "107810    35175.0   36 months   17.09%     D       MORTGAGE  10+ years   \n",
       "107819    10000.0   36 months   16.02%     C           RENT    5 years   \n",
       "\n",
       "               loan_status disbursement_method  label  \n",
       "214     Late (31-120 days)                Cash      1  \n",
       "276     Late (31-120 days)                Cash      1  \n",
       "499     Late (31-120 days)                Cash      1  \n",
       "547     Late (31-120 days)                Cash      1  \n",
       "655     Late (31-120 days)                Cash      1  \n",
       "...                    ...                 ...    ...  \n",
       "107558         Charged Off                Cash      1  \n",
       "107571  Late (31-120 days)                Cash      1  \n",
       "107755  Late (31-120 days)                Cash      1  \n",
       "107810         Charged Off                Cash      1  \n",
       "107819   Late (16-30 days)                Cash      1  \n",
       "\n",
       "[2392 rows x 9 columns]"
      ]
     },
     "execution_count": 141,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df['label']==1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "id": "2de2f029",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 196872 entries, 0 to 107863\n",
      "Data columns (total 9 columns):\n",
      " #   Column               Non-Null Count   Dtype  \n",
      "---  ------               --------------   -----  \n",
      " 0   loan_amnt            196872 non-null  float64\n",
      " 1   term                 196872 non-null  object \n",
      " 2   int_rate             196872 non-null  object \n",
      " 3   grade                196872 non-null  object \n",
      " 4   home_ownership       196872 non-null  object \n",
      " 5   emp_length           196872 non-null  object \n",
      " 6   loan_status          196872 non-null  object \n",
      " 7   disbursement_method  196872 non-null  object \n",
      " 8   label                196872 non-null  int64  \n",
      "dtypes: float64(1), int64(1), object(7)\n",
      "memory usage: 15.0+ MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "id": "51691a78",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0         36\n",
       "1         36\n",
       "2         60\n",
       "3         36\n",
       "4         36\n",
       "          ..\n",
       "107859    36\n",
       "107860    36\n",
       "107861    36\n",
       "107862    36\n",
       "107863    36\n",
       "Name: term, Length: 196872, dtype: int32"
      ]
     },
     "execution_count": 143,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['term'].astype(str).str[:3].astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "id": "f5051c3a",
   "metadata": {},
   "outputs": [],
   "source": [
    "df['term'] = df['term'].astype(str).str[:3].astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "id": "54f4b89c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 196872 entries, 0 to 107863\n",
      "Data columns (total 9 columns):\n",
      " #   Column               Non-Null Count   Dtype  \n",
      "---  ------               --------------   -----  \n",
      " 0   loan_amnt            196872 non-null  float64\n",
      " 1   term                 196872 non-null  int32  \n",
      " 2   int_rate             196872 non-null  object \n",
      " 3   grade                196872 non-null  object \n",
      " 4   home_ownership       196872 non-null  object \n",
      " 5   emp_length           196872 non-null  object \n",
      " 6   loan_status          196872 non-null  object \n",
      " 7   disbursement_method  196872 non-null  object \n",
      " 8   label                196872 non-null  int64  \n",
      "dtypes: float64(1), int32(1), int64(1), object(6)\n",
      "memory usage: 14.3+ MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "id": "0bbbb41e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>term</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>grade</th>\n",
       "      <th>home_ownership</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>loan_status</th>\n",
       "      <th>disbursement_method</th>\n",
       "      <th>label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>18500.0</td>\n",
       "      <td>36</td>\n",
       "      <td>13.58</td>\n",
       "      <td>C</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>6 years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>7.34</td>\n",
       "      <td>A</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>16000.0</td>\n",
       "      <td>60</td>\n",
       "      <td>11.98</td>\n",
       "      <td>B</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>5 years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>28000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>10.90</td>\n",
       "      <td>B</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>6 years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8300.0</td>\n",
       "      <td>36</td>\n",
       "      <td>7.34</td>\n",
       "      <td>A</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>4 years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107859</th>\n",
       "      <td>5000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>12.62</td>\n",
       "      <td>C</td>\n",
       "      <td>RENT</td>\n",
       "      <td>4 years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107860</th>\n",
       "      <td>6000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>10.91</td>\n",
       "      <td>B</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107861</th>\n",
       "      <td>4375.0</td>\n",
       "      <td>36</td>\n",
       "      <td>14.08</td>\n",
       "      <td>C</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107862</th>\n",
       "      <td>12000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>10.42</td>\n",
       "      <td>B</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>8 years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107863</th>\n",
       "      <td>14000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>13.59</td>\n",
       "      <td>C</td>\n",
       "      <td>OWN</td>\n",
       "      <td>2 years</td>\n",
       "      <td>Fully Paid</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>196872 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        loan_amnt  term  int_rate grade home_ownership emp_length loan_status  \\\n",
       "0         18500.0    36     13.58     C       MORTGAGE    6 years     Current   \n",
       "1          7000.0    36      7.34     A       MORTGAGE  10+ years     Current   \n",
       "2         16000.0    60     11.98     B       MORTGAGE    5 years     Current   \n",
       "3         28000.0    36     10.90     B       MORTGAGE    6 years     Current   \n",
       "4          8300.0    36      7.34     A       MORTGAGE    4 years     Current   \n",
       "...           ...   ...       ...   ...            ...        ...         ...   \n",
       "107859     5000.0    36     12.62     C           RENT    4 years     Current   \n",
       "107860     6000.0    36     10.91     B       MORTGAGE  10+ years     Current   \n",
       "107861     4375.0    36     14.08     C       MORTGAGE  10+ years     Current   \n",
       "107862    12000.0    36     10.42     B       MORTGAGE    8 years     Current   \n",
       "107863    14000.0    36     13.59     C            OWN    2 years  Fully Paid   \n",
       "\n",
       "       disbursement_method  label  \n",
       "0                     Cash      0  \n",
       "1                     Cash      0  \n",
       "2                     Cash      0  \n",
       "3                     Cash      0  \n",
       "4                     Cash      0  \n",
       "...                    ...    ...  \n",
       "107859                Cash      0  \n",
       "107860                Cash      0  \n",
       "107861                Cash      0  \n",
       "107862                Cash      0  \n",
       "107863                Cash      0  \n",
       "\n",
       "[196872 rows x 9 columns]"
      ]
     },
     "execution_count": 146,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['int_rate'] = df['int_rate'].astype(str).str[:-1].astype(float)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "id": "f83c7026",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>term</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>grade</th>\n",
       "      <th>home_ownership</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>loan_status</th>\n",
       "      <th>disbursement_method</th>\n",
       "      <th>label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>18500.0</td>\n",
       "      <td>36</td>\n",
       "      <td>13.58</td>\n",
       "      <td>C</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>6 years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>7.34</td>\n",
       "      <td>A</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>16000.0</td>\n",
       "      <td>60</td>\n",
       "      <td>11.98</td>\n",
       "      <td>B</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>5 years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>28000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>10.90</td>\n",
       "      <td>B</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>6 years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8300.0</td>\n",
       "      <td>36</td>\n",
       "      <td>7.34</td>\n",
       "      <td>A</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>4 years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107859</th>\n",
       "      <td>5000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>12.62</td>\n",
       "      <td>C</td>\n",
       "      <td>RENT</td>\n",
       "      <td>4 years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107860</th>\n",
       "      <td>6000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>10.91</td>\n",
       "      <td>B</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107861</th>\n",
       "      <td>4375.0</td>\n",
       "      <td>36</td>\n",
       "      <td>14.08</td>\n",
       "      <td>C</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107862</th>\n",
       "      <td>12000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>10.42</td>\n",
       "      <td>B</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>8 years</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107863</th>\n",
       "      <td>14000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>13.59</td>\n",
       "      <td>C</td>\n",
       "      <td>OWN</td>\n",
       "      <td>2 years</td>\n",
       "      <td>Fully Paid</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>196872 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        loan_amnt  term  int_rate grade home_ownership emp_length loan_status  \\\n",
       "0         18500.0    36     13.58     C       MORTGAGE    6 years     Current   \n",
       "1          7000.0    36      7.34     A       MORTGAGE  10+ years     Current   \n",
       "2         16000.0    60     11.98     B       MORTGAGE    5 years     Current   \n",
       "3         28000.0    36     10.90     B       MORTGAGE    6 years     Current   \n",
       "4          8300.0    36      7.34     A       MORTGAGE    4 years     Current   \n",
       "...           ...   ...       ...   ...            ...        ...         ...   \n",
       "107859     5000.0    36     12.62     C           RENT    4 years     Current   \n",
       "107860     6000.0    36     10.91     B       MORTGAGE  10+ years     Current   \n",
       "107861     4375.0    36     14.08     C       MORTGAGE  10+ years     Current   \n",
       "107862    12000.0    36     10.42     B       MORTGAGE    8 years     Current   \n",
       "107863    14000.0    36     13.59     C            OWN    2 years  Fully Paid   \n",
       "\n",
       "       disbursement_method  label  \n",
       "0                     Cash      0  \n",
       "1                     Cash      0  \n",
       "2                     Cash      0  \n",
       "3                     Cash      0  \n",
       "4                     Cash      0  \n",
       "...                    ...    ...  \n",
       "107859                Cash      0  \n",
       "107860                Cash      0  \n",
       "107861                Cash      0  \n",
       "107862                Cash      0  \n",
       "107863                Cash      0  \n",
       "\n",
       "[196872 rows x 9 columns]"
      ]
     },
     "execution_count": 147,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "id": "85a85b43",
   "metadata": {},
   "outputs": [],
   "source": [
    "def changeToValue(x):\n",
    "    x = str(x)[:-5].strip()\n",
    "    if(x=='10+'):\n",
    "        return 10\n",
    "    elif(x=='< 1'):\n",
    "        return 0\n",
    "    else:\n",
    "        return int(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 149,
   "id": "d292c2d4",
   "metadata": {},
   "outputs": [],
   "source": [
    "df['emp_length'] = df.emp_length.apply(lambda x:changeToValue(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "id": "855901f9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>term</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>grade</th>\n",
       "      <th>home_ownership</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>loan_status</th>\n",
       "      <th>disbursement_method</th>\n",
       "      <th>label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>18500.0</td>\n",
       "      <td>36</td>\n",
       "      <td>13.58</td>\n",
       "      <td>C</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>6</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>7.34</td>\n",
       "      <td>A</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>10</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>16000.0</td>\n",
       "      <td>60</td>\n",
       "      <td>11.98</td>\n",
       "      <td>B</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>5</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>28000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>10.90</td>\n",
       "      <td>B</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>6</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8300.0</td>\n",
       "      <td>36</td>\n",
       "      <td>7.34</td>\n",
       "      <td>A</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>4</td>\n",
       "      <td>Current</td>\n",
       "      <td>Cash</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   loan_amnt  term  int_rate grade home_ownership  emp_length loan_status  \\\n",
       "0    18500.0    36     13.58     C       MORTGAGE           6     Current   \n",
       "1     7000.0    36      7.34     A       MORTGAGE          10     Current   \n",
       "2    16000.0    60     11.98     B       MORTGAGE           5     Current   \n",
       "3    28000.0    36     10.90     B       MORTGAGE           6     Current   \n",
       "4     8300.0    36      7.34     A       MORTGAGE           4     Current   \n",
       "\n",
       "  disbursement_method  label  \n",
       "0                Cash      0  \n",
       "1                Cash      0  \n",
       "2                Cash      0  \n",
       "3                Cash      0  \n",
       "4                Cash      0  "
      ]
     },
     "execution_count": 150,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "id": "8ba76350",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 196872 entries, 0 to 107863\n",
      "Data columns (total 9 columns):\n",
      " #   Column               Non-Null Count   Dtype  \n",
      "---  ------               --------------   -----  \n",
      " 0   loan_amnt            196872 non-null  float64\n",
      " 1   term                 196872 non-null  int32  \n",
      " 2   int_rate             196872 non-null  float64\n",
      " 3   grade                196872 non-null  object \n",
      " 4   home_ownership       196872 non-null  object \n",
      " 5   emp_length           196872 non-null  int64  \n",
      " 6   loan_status          196872 non-null  object \n",
      " 7   disbursement_method  196872 non-null  object \n",
      " 8   label                196872 non-null  int64  \n",
      "dtypes: float64(2), int32(1), int64(2), object(4)\n",
      "memory usage: 14.3+ MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "id": "5a6d47cb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Cash', 'DirectPay'}"
      ]
     },
     "execution_count": 152,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set(df.disbursement_method)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "id": "b38d3efa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Cash</th>\n",
       "      <th>DirectPay</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107859</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107860</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107861</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107862</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107863</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>196872 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        Cash  DirectPay\n",
       "0          1          0\n",
       "1          1          0\n",
       "2          1          0\n",
       "3          1          0\n",
       "4          1          0\n",
       "...      ...        ...\n",
       "107859     1          0\n",
       "107860     1          0\n",
       "107861     1          0\n",
       "107862     1          0\n",
       "107863     1          0\n",
       "\n",
       "[196872 rows x 2 columns]"
      ]
     },
     "execution_count": 153,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.get_dummies(df.disbursement_method)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "id": "c588f631",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.concat([df,pd.get_dummies(df.disbursement_method)],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "id": "0fb71bb9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>term</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>grade</th>\n",
       "      <th>home_ownership</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>loan_status</th>\n",
       "      <th>disbursement_method</th>\n",
       "      <th>label</th>\n",
       "      <th>Cash</th>\n",
       "      <th>DirectPay</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>5000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>5.31</td>\n",
       "      <td>A</td>\n",
       "      <td>RENT</td>\n",
       "      <td>2</td>\n",
       "      <td>Current</td>\n",
       "      <td>DirectPay</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>66</th>\n",
       "      <td>12000.0</td>\n",
       "      <td>60</td>\n",
       "      <td>13.58</td>\n",
       "      <td>C</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>10</td>\n",
       "      <td>Current</td>\n",
       "      <td>DirectPay</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>68</th>\n",
       "      <td>20000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>5.31</td>\n",
       "      <td>A</td>\n",
       "      <td>OWN</td>\n",
       "      <td>5</td>\n",
       "      <td>Current</td>\n",
       "      <td>DirectPay</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>82</th>\n",
       "      <td>30000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>11.98</td>\n",
       "      <td>B</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>4</td>\n",
       "      <td>Current</td>\n",
       "      <td>DirectPay</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>19000.0</td>\n",
       "      <td>60</td>\n",
       "      <td>18.45</td>\n",
       "      <td>D</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>10</td>\n",
       "      <td>Current</td>\n",
       "      <td>DirectPay</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107717</th>\n",
       "      <td>22000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>5.32</td>\n",
       "      <td>A</td>\n",
       "      <td>RENT</td>\n",
       "      <td>10</td>\n",
       "      <td>Current</td>\n",
       "      <td>DirectPay</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107751</th>\n",
       "      <td>30000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>17.09</td>\n",
       "      <td>D</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>4</td>\n",
       "      <td>Current</td>\n",
       "      <td>DirectPay</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107797</th>\n",
       "      <td>20000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>6.08</td>\n",
       "      <td>A</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>8</td>\n",
       "      <td>Current</td>\n",
       "      <td>DirectPay</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107822</th>\n",
       "      <td>16000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>7.97</td>\n",
       "      <td>A</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>10</td>\n",
       "      <td>Current</td>\n",
       "      <td>DirectPay</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107854</th>\n",
       "      <td>18000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>19.03</td>\n",
       "      <td>D</td>\n",
       "      <td>RENT</td>\n",
       "      <td>10</td>\n",
       "      <td>Current</td>\n",
       "      <td>DirectPay</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>13470 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        loan_amnt  term  int_rate grade home_ownership  emp_length  \\\n",
       "43         5000.0    36      5.31     A           RENT           2   \n",
       "66        12000.0    60     13.58     C       MORTGAGE          10   \n",
       "68        20000.0    36      5.31     A            OWN           5   \n",
       "82        30000.0    36     11.98     B       MORTGAGE           4   \n",
       "88        19000.0    60     18.45     D       MORTGAGE          10   \n",
       "...           ...   ...       ...   ...            ...         ...   \n",
       "107717    22000.0    36      5.32     A           RENT          10   \n",
       "107751    30000.0    36     17.09     D       MORTGAGE           4   \n",
       "107797    20000.0    36      6.08     A       MORTGAGE           8   \n",
       "107822    16000.0    36      7.97     A       MORTGAGE          10   \n",
       "107854    18000.0    36     19.03     D           RENT          10   \n",
       "\n",
       "       loan_status disbursement_method  label  Cash  DirectPay  \n",
       "43         Current           DirectPay      0     0          1  \n",
       "66         Current           DirectPay      0     0          1  \n",
       "68         Current           DirectPay      0     0          1  \n",
       "82         Current           DirectPay      0     0          1  \n",
       "88         Current           DirectPay      0     0          1  \n",
       "...            ...                 ...    ...   ...        ...  \n",
       "107717     Current           DirectPay      0     0          1  \n",
       "107751     Current           DirectPay      0     0          1  \n",
       "107797     Current           DirectPay      0     0          1  \n",
       "107822     Current           DirectPay      0     0          1  \n",
       "107854     Current           DirectPay      0     0          1  \n",
       "\n",
       "[13470 rows x 11 columns]"
      ]
     },
     "execution_count": 159,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df['Cash']==0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "id": "be785542",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>term</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>grade</th>\n",
       "      <th>home_ownership</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>label</th>\n",
       "      <th>Cash</th>\n",
       "      <th>DirectPay</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>18500.0</td>\n",
       "      <td>36</td>\n",
       "      <td>13.58</td>\n",
       "      <td>C</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>7.34</td>\n",
       "      <td>A</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>16000.0</td>\n",
       "      <td>60</td>\n",
       "      <td>11.98</td>\n",
       "      <td>B</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>28000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>10.90</td>\n",
       "      <td>B</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8300.0</td>\n",
       "      <td>36</td>\n",
       "      <td>7.34</td>\n",
       "      <td>A</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   loan_amnt  term  int_rate grade home_ownership  emp_length  label  Cash  \\\n",
       "0    18500.0    36     13.58     C       MORTGAGE           6      0     1   \n",
       "1     7000.0    36      7.34     A       MORTGAGE          10      0     1   \n",
       "2    16000.0    60     11.98     B       MORTGAGE           5      0     1   \n",
       "3    28000.0    36     10.90     B       MORTGAGE           6      0     1   \n",
       "4     8300.0    36      7.34     A       MORTGAGE           4      0     1   \n",
       "\n",
       "   DirectPay  \n",
       "0          0  \n",
       "1          0  \n",
       "2          0  \n",
       "3          0  \n",
       "4          0  "
      ]
     },
     "execution_count": 160,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.drop(\"disbursement_method\",axis=1,inplace=True)\n",
    "df.drop(\"loan_status\",axis=1,inplace=True)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "id": "ab29942f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'ANY', 'MORTGAGE', 'OWN', 'RENT'}"
      ]
     },
     "execution_count": 161,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set(df.home_ownership)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "id": "5322fcf6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'A', 'B', 'C', 'D', 'E', 'F', 'G'}"
      ]
     },
     "execution_count": 162,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set(df.grade)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "id": "c43db11a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def Grade2Value(x):\n",
    "    return ord('H')-ord(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "id": "d71192e2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7"
      ]
     },
     "execution_count": 172,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Grade2Value('A')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "id": "53076853",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>term</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>grade</th>\n",
       "      <th>home_ownership</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>label</th>\n",
       "      <th>Cash</th>\n",
       "      <th>DirectPay</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>18500.0</td>\n",
       "      <td>36</td>\n",
       "      <td>13.58</td>\n",
       "      <td>5</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>7.34</td>\n",
       "      <td>7</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>16000.0</td>\n",
       "      <td>60</td>\n",
       "      <td>11.98</td>\n",
       "      <td>6</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>28000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>10.90</td>\n",
       "      <td>6</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8300.0</td>\n",
       "      <td>36</td>\n",
       "      <td>7.34</td>\n",
       "      <td>7</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   loan_amnt  term  int_rate  grade home_ownership  emp_length  label  Cash  \\\n",
       "0    18500.0    36     13.58      5       MORTGAGE           6      0     1   \n",
       "1     7000.0    36      7.34      7       MORTGAGE          10      0     1   \n",
       "2    16000.0    60     11.98      6       MORTGAGE           5      0     1   \n",
       "3    28000.0    36     10.90      6       MORTGAGE           6      0     1   \n",
       "4     8300.0    36      7.34      7       MORTGAGE           4      0     1   \n",
       "\n",
       "   DirectPay  \n",
       "0          0  \n",
       "1          0  \n",
       "2          0  \n",
       "3          0  \n",
       "4          0  "
      ]
     },
     "execution_count": 173,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['grade'] = df.grade.apply(lambda x:Grade2Value(x))\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "id": "c2df033a",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df[df['home_ownership']!='ANY']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 177,
   "id": "1d92121d",
   "metadata": {},
   "outputs": [],
   "source": [
    "def Home2Value(x):\n",
    "    if(x=='MORTGAGE'):\n",
    "        return 1\n",
    "    elif(x=='RENT'):\n",
    "        return 2\n",
    "    elif(x=='OWN'):\n",
    "        return 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 178,
   "id": "728e7fda",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Develop Tools\\anaconda\\envs\\ml\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    }
   ],
   "source": [
    "df['home_ownership'] =df.home_ownership.apply(lambda x:Home2Value(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 179,
   "id": "a8d2ac55",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>term</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>grade</th>\n",
       "      <th>home_ownership</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>label</th>\n",
       "      <th>Cash</th>\n",
       "      <th>DirectPay</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>18500.0</td>\n",
       "      <td>36</td>\n",
       "      <td>13.58</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>7.34</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>16000.0</td>\n",
       "      <td>60</td>\n",
       "      <td>11.98</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>28000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>10.90</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8300.0</td>\n",
       "      <td>36</td>\n",
       "      <td>7.34</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>6.07</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>3125.0</td>\n",
       "      <td>36</td>\n",
       "      <td>21.85</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>3000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>11.98</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>6500.0</td>\n",
       "      <td>36</td>\n",
       "      <td>10.90</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>15000.0</td>\n",
       "      <td>60</td>\n",
       "      <td>18.45</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    loan_amnt  term  int_rate  grade  home_ownership  emp_length  label  Cash  \\\n",
       "0     18500.0    36     13.58      5               1           6      0     1   \n",
       "1      7000.0    36      7.34      7               1          10      0     1   \n",
       "2     16000.0    60     11.98      6               1           5      0     1   \n",
       "3     28000.0    36     10.90      6               1           6      0     1   \n",
       "4      8300.0    36      7.34      7               1           4      0     1   \n",
       "5      6000.0    36      6.07      7               2           5      0     1   \n",
       "6      3125.0    36     21.85      4               1          10      0     1   \n",
       "7      3000.0    36     11.98      6               3          10      0     1   \n",
       "8      6500.0    36     10.90      6               1           1      0     1   \n",
       "10    15000.0    60     18.45      4               1          10      0     1   \n",
       "\n",
       "    DirectPay  \n",
       "0           0  \n",
       "1           0  \n",
       "2           0  \n",
       "3           0  \n",
       "4           0  \n",
       "5           0  \n",
       "6           0  \n",
       "7           0  \n",
       "8           0  \n",
       "10          0  "
      ]
     },
     "execution_count": 179,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 180,
   "id": "cdaa138b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 196868 entries, 0 to 107863\n",
      "Data columns (total 9 columns):\n",
      " #   Column          Non-Null Count   Dtype  \n",
      "---  ------          --------------   -----  \n",
      " 0   loan_amnt       196868 non-null  float64\n",
      " 1   term            196868 non-null  int32  \n",
      " 2   int_rate        196868 non-null  float64\n",
      " 3   grade           196868 non-null  int64  \n",
      " 4   home_ownership  196868 non-null  int64  \n",
      " 5   emp_length      196868 non-null  int64  \n",
      " 6   label           196868 non-null  int64  \n",
      " 7   Cash            196868 non-null  uint8  \n",
      " 8   DirectPay       196868 non-null  uint8  \n",
      "dtypes: float64(2), int32(1), int64(4), uint8(2)\n",
      "memory usage: 15.7 MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 187,
   "id": "0298d9fd",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>term</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>grade</th>\n",
       "      <th>home_ownership</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>label</th>\n",
       "      <th>Cash</th>\n",
       "      <th>DirectPay</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>18500.0</td>\n",
       "      <td>36</td>\n",
       "      <td>13.58</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>7.34</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>16000.0</td>\n",
       "      <td>60</td>\n",
       "      <td>11.98</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>28000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>10.90</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8300.0</td>\n",
       "      <td>36</td>\n",
       "      <td>7.34</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2845</th>\n",
       "      <td>7500.0</td>\n",
       "      <td>36</td>\n",
       "      <td>11.98</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2847</th>\n",
       "      <td>8000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>7.34</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2848</th>\n",
       "      <td>10000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>6.71</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2849</th>\n",
       "      <td>40000.0</td>\n",
       "      <td>60</td>\n",
       "      <td>12.61</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2850</th>\n",
       "      <td>40000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>7.34</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2600 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      loan_amnt  term  int_rate  grade  home_ownership  emp_length  label  \\\n",
       "0       18500.0    36     13.58      5               1           6      0   \n",
       "1        7000.0    36      7.34      7               1          10      0   \n",
       "2       16000.0    60     11.98      6               1           5      0   \n",
       "3       28000.0    36     10.90      6               1           6      0   \n",
       "4        8300.0    36      7.34      7               1           4      0   \n",
       "...         ...   ...       ...    ...             ...         ...    ...   \n",
       "2845     7500.0    36     11.98      6               1          10      0   \n",
       "2847     8000.0    36      7.34      7               2           2      0   \n",
       "2848    10000.0    36      6.71      7               1           7      0   \n",
       "2849    40000.0    60     12.61      5               3           2      0   \n",
       "2850    40000.0    36      7.34      7               1           4      0   \n",
       "\n",
       "      Cash  DirectPay  \n",
       "0        1          0  \n",
       "1        1          0  \n",
       "2        1          0  \n",
       "3        1          0  \n",
       "4        1          0  \n",
       "...    ...        ...  \n",
       "2845     0          1  \n",
       "2847     1          0  \n",
       "2848     0          1  \n",
       "2849     1          0  \n",
       "2850     1          0  \n",
       "\n",
       "[2600 rows x 9 columns]"
      ]
     },
     "execution_count": 187,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "positive = df[df['label']==0].iloc[:2600,:]\n",
    "positive"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 188,
   "id": "4079e0b9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>term</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>grade</th>\n",
       "      <th>home_ownership</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>label</th>\n",
       "      <th>Cash</th>\n",
       "      <th>DirectPay</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>214</th>\n",
       "      <td>6000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>20.39</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>276</th>\n",
       "      <td>12000.0</td>\n",
       "      <td>60</td>\n",
       "      <td>11.98</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>499</th>\n",
       "      <td>6000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>9.43</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>547</th>\n",
       "      <td>5000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>18.45</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>655</th>\n",
       "      <td>1500.0</td>\n",
       "      <td>36</td>\n",
       "      <td>20.39</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107558</th>\n",
       "      <td>32200.0</td>\n",
       "      <td>60</td>\n",
       "      <td>16.02</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107571</th>\n",
       "      <td>10000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>9.44</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107755</th>\n",
       "      <td>16000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>20.00</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107810</th>\n",
       "      <td>35175.0</td>\n",
       "      <td>36</td>\n",
       "      <td>17.09</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107819</th>\n",
       "      <td>10000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>16.02</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2392 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        loan_amnt  term  int_rate  grade  home_ownership  emp_length  label  \\\n",
       "214        6000.0    36     20.39      4               2           2      1   \n",
       "276       12000.0    60     11.98      6               3          10      1   \n",
       "499        6000.0    36      9.43      6               2           6      1   \n",
       "547        5000.0    36     18.45      4               1           1      1   \n",
       "655        1500.0    36     20.39      4               2           6      1   \n",
       "...           ...   ...       ...    ...             ...         ...    ...   \n",
       "107558    32200.0    60     16.02      5               2           3      1   \n",
       "107571    10000.0    36      9.44      6               2           0      1   \n",
       "107755    16000.0    36     20.00      4               2           2      1   \n",
       "107810    35175.0    36     17.09      4               1          10      1   \n",
       "107819    10000.0    36     16.02      5               2           5      1   \n",
       "\n",
       "        Cash  DirectPay  \n",
       "214        1          0  \n",
       "276        1          0  \n",
       "499        1          0  \n",
       "547        1          0  \n",
       "655        1          0  \n",
       "...      ...        ...  \n",
       "107558     1          0  \n",
       "107571     1          0  \n",
       "107755     1          0  \n",
       "107810     1          0  \n",
       "107819     1          0  \n",
       "\n",
       "[2392 rows x 9 columns]"
      ]
     },
     "execution_count": 188,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nagative = df[df['label']==1]\n",
    "nagative"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "id": "c4faf9e0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>term</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>grade</th>\n",
       "      <th>home_ownership</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>label</th>\n",
       "      <th>Cash</th>\n",
       "      <th>DirectPay</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>18500.0</td>\n",
       "      <td>36</td>\n",
       "      <td>13.58</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>7.34</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>16000.0</td>\n",
       "      <td>60</td>\n",
       "      <td>11.98</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>28000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>10.90</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8300.0</td>\n",
       "      <td>36</td>\n",
       "      <td>7.34</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107558</th>\n",
       "      <td>32200.0</td>\n",
       "      <td>60</td>\n",
       "      <td>16.02</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107571</th>\n",
       "      <td>10000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>9.44</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107755</th>\n",
       "      <td>16000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>20.00</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107810</th>\n",
       "      <td>35175.0</td>\n",
       "      <td>36</td>\n",
       "      <td>17.09</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107819</th>\n",
       "      <td>10000.0</td>\n",
       "      <td>36</td>\n",
       "      <td>16.02</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4992 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        loan_amnt  term  int_rate  grade  home_ownership  emp_length  label  \\\n",
       "0         18500.0    36     13.58      5               1           6      0   \n",
       "1          7000.0    36      7.34      7               1          10      0   \n",
       "2         16000.0    60     11.98      6               1           5      0   \n",
       "3         28000.0    36     10.90      6               1           6      0   \n",
       "4          8300.0    36      7.34      7               1           4      0   \n",
       "...           ...   ...       ...    ...             ...         ...    ...   \n",
       "107558    32200.0    60     16.02      5               2           3      1   \n",
       "107571    10000.0    36      9.44      6               2           0      1   \n",
       "107755    16000.0    36     20.00      4               2           2      1   \n",
       "107810    35175.0    36     17.09      4               1          10      1   \n",
       "107819    10000.0    36     16.02      5               2           5      1   \n",
       "\n",
       "        Cash  DirectPay  \n",
       "0          1          0  \n",
       "1          1          0  \n",
       "2          1          0  \n",
       "3          1          0  \n",
       "4          1          0  \n",
       "...      ...        ...  \n",
       "107558     1          0  \n",
       "107571     1          0  \n",
       "107755     1          0  \n",
       "107810     1          0  \n",
       "107819     1          0  \n",
       "\n",
       "[4992 rows x 9 columns]"
      ]
     },
     "execution_count": 189,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_df = pd.concat([positive,nagative],axis=0)\n",
    "new_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 191,
   "id": "a78639f1",
   "metadata": {},
   "outputs": [],
   "source": [
    "new_df.to_csv(\"train.csv\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
